Notes: Data: 2020-11-03_Switzerland_upto_2018-11-31, Switzerland_2018-12-01_2020-11-03. Pre-processing: After gathering the data, the two datasets were merged and it was cleaned the duplicates and empty values, specially for dates

PACKAGES

1. RAW DATA PROCESSING

#CLEANING PROCESS TO JOIN GATHERED DATA FROM FLICKR

#data <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/2020-11-03_Switzerland_upto_2018-11-31.csv", encoding = "UTF-8" )
#saveRDS(data, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")

#data2 <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Switzerland_2018-12-01_2020-11-03.csv", encoding = "UTF-8")
#saveRDS(data2, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")

# export dataframes to backup the main tables
#d1 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")
#d2 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")

#Join two dataset in a single main dataframe
#raw <-  dplyr::bind_rows(d1, d2)

#raw$tags <- gsub(";", "  ", raw$tags) # Remove symbol in column tags
#raw$post_title <- gsub(";", "  ", raw$post_title) # Remove symbol in column post_title
#raw$post_body <- gsub(";", "  ", raw$post_body) # Remove symbol in column post_body

#raw_clean <- subset(raw, select= c("post_guid", "longitude", "latitude", "user_guid", "post_create_date", "post_publish_date", "tags", "post_title", "post_body", "place_guid"))

#raw_clean <- distinct(raw_clean, post_guid, .keep_all = TRUE)
#raw_clean <- filter(raw_clean, longitude >= 0.0001) 

#Export the total table for future applications
#write.table(raw_clean,"~/GitHub/Flickr_SwissParks/Join2018-2020.csv", sep=";", dec=".")

1.1 FUNCTIONS

# define root folders for data
root_folder <- '~/GitHub/Flickr_SwissParks/'    # local folder 
setwd<- '~/GitHub/Flickr_SwissParks/'

# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326"  # lat/lng
crs_sng <- "+init=epsg:2056"    # Swiss National, CH1903+ / LV95

# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326"  # lat/lng
crs_osm <- "+init=epsg:3857"    # OSM projection
crs_bng <- "+init=epsg:27700"   # British National Grid, BNG
crs_sng <- "+init=epsg:2056"    # Swiss National, CH1903+ / LV95

# Set coordinate systems for reprojecting
proj_wgs84 <- '+proj=longlat +datum=WGS84'
proj_osm <- '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs'
proj_bng <- '+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs'
proj_sng <- '+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs'

# set point at which plot will switch from standard form
options(scipen=7)

# chart theme
t <- theme_bw() + 
  theme(panel.border=element_blank(),    # removes border around chart area
        axis.text = element_text(size = 10, colour='#444444'),
        axis.ticks = element_line(colour='gray'))

# reproject from lat/lng (WGS84) to another CRS
# ---------------------------------------------
# requires columns labeled 'lat', 'lng' in first two columns, and at least two other columns (4 cols minimum)
reproject <- function(df, proj_crs) {
  
  dataset_map_coords <- df[c("lng", "lat")]  # specifying names to avoid mixing x and y up in order...
  dataset_map_data <- df[ ,c(3:ncol(df))]    
  dataset_map <- SpatialPointsDataFrame(coords=dataset_map_coords, data=dataset_map_data)
  
  # set CRS and reproject to OSM for OSM number system
  dataset_map@proj4string # check first, should be NA - not yet been set
  proj4string(dataset_map) <- CRS(crs_wgs84)  # set the current coordinates system
  dataset_map <- spTransform(dataset_map, CRS(proj_crs))  # reproject to new CRS
  
  # convert back to dataframe with reprojected cooordinates and then amend to original dataset
  newdf <- as.data.frame(dataset_map)
  names(newdf)[names(newdf) == "lng"] <- 'crs_x'; 
  names(newdf)[names(newdf) == "lat"] <- 'crs_y'        
  newdf <- newdf[, c("crs_x", "crs_y")]   # just want to keep the reprojected coordinates to append back to dataset
  df <- cbind(df, newdf)
  return(df)
}


# load in the data files and standardise lat/lng column name for reproject
# ---------------------------------------
# load data file, rename coordinates columns
loaddata <- function(input_file) {
  
  folder <- paste0(root_folder, '')
  input_data <- paste0(folder, input_file)
  df <- read.csv(input_data, sep = ";", na.strings="0")
  
  # rename latitude and longitude columns to lat and lng
  names(df)[names(df) == "latitude"] <- "lat"
  names(df)[names(df) == "longitude"] <- "lng"
  
  return(df)
}

1.2 LOAD DATA

#Run the functions with the raw data
input_file <- 'Join2018-2020.csv'
orig <- loaddata(input_file)
orig$lat <- as.numeric(as.character(orig$lat))
orig$lng <- as.numeric(as.character(orig$lng))
orig$is_na = ifelse(is.na(orig$lat), TRUE, FALSE)
orig<-orig[!(orig$is_na=="TRUE"),]

1.2.1 PREPARATION OF THE DATASET

# Drop unneeded columns and rows to slim the dataset
# -----------------------------------------

#### a. Only pictures taken after 2004 (inclusive)
# Creating a subset of the data, just for picture that were taken after 2004
df <- orig
df$post_create <- as.Date(df$post_create_date)
reduced_04 <- filter(df, post_create >= "2004-01-01") # we create a new dataframe just for the data after 2004
reduced_04 <- filter(reduced_04, post_create <= "2020-11-03") # we create a new dataframe just for the data after 2004
trimmed <- reduced_04
rm(df)

# add in reprojection to CH1903+ / LV95
# -------------------------------------
# slice off min. fields to acquire reprojection coords, then rejoin to dataset
# order matters: need lat and lng in first two fields + two other fields for data (any will do)
df <- trimmed[, c(3, 2, 1, 4:10)]
proj_crs <- proj_sng  # reproject from WGS84 to Swiss nat.coords 
df <- reproject(df, proj_crs)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Bessel 1841 ellipsoid in Proj4
## definition
df <- df[, c(11:12)]  # just keep the new coords and then merge back into source
trimmed <- cbind(trimmed, df)
trimmed <- trimmed[, c(1, 4:10, 13,14)]
rm(df)

# rename latitude and longitude columns to lat and lng
names(trimmed)[names(trimmed) == "crs_y"] <- "lat"
names(trimmed)[names(trimmed) == "crs_x"] <- "lng"
df <-  trimmed

1.2.2 SNIP TO DATA WITHIN SWISS BORDERS

# create SpatialPointsPolygon for data
df_map_coords <- df[c("lng", "lat")]
df_map_data <- df[ ,c(1:8)]
df_map <- SpatialPointsDataFrame(coords=df_map_coords, data=df_map_data)
proj4string(df_map) <- CRS(proj_sng)
df_map_St <- st_as_sf(df_map)

# preparation of shapefiles of land cover, cantonal limits and parks polygons.
parks <- readOGR(dsn="//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Parks.shp", layer: "Swiss_Parks"
## with 29 features
## It has 13 fields
## Integer64 fields read as strings:  OBJECTID Rechtsgrun
## Warning in readOGR(dsn = "//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Parks.shp", : Z-dimension discarded
parks <- spTransform(parks, CRS(proj_sng))
landcover <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/LandCover_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\LandCover_Parks.shp", layer: "LandCover_Parks"
## with 39308 features
## It has 5 fields
## Integer64 fields read as strings:  OBJECTID Id gridcode
landcover <- spTransform(landcover, CRS(proj_sng))
kanton <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Kanton.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +vunits=m
## +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Kanton.shp", layer: "Swiss_Kanton"
## with 50 features
## It has 20 fields
## Integer64 fields read as strings:  ERSTELL_J REVISION_J HERKUNFT_J KANTONSNUM EINWOHNERZ
## Warning in readOGR("//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Kanton.shp", : Z-dimension discarded
kanton <- spTransform(kanton, CRS(proj_sng))


#Convert a geographical object in a simple feature with geometry
#Reduce the time of processing, making efficient the process of intersection
parks_St <- st_as_sf(parks)  
landcover_St <- st_as_sf(landcover)
kanton_St <- st_as_sf(kanton)

# Intersect points (Flickr images) with limits of parks
Flickr_park <- st_intersection(parks_St, df_map_St)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Intersect points (Flickr images) with land cover.
Flickr_park <- st_intersection(landcover_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Include data of canton in the points data
Flickr_park <- st_intersection(kanton_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Flickr_Parks_map.png")       

ggplot() + 
  geom_sf(data = kanton_St, fill = "grey", color = "white") +
  geom_sf(data = Flickr_park, fill = NA, color = "yellow", size=0.005) +
  geom_sf(data = parks_St, fill= NA, color = "dark green", size= 1) +
  ggtitle("Flickr pictures in Swiss National Parks") + 
  theme(plot.title = element_text(hjust = 0.5))+
  coord_sf()

rm(trimmed)
rm(df)
rm(t)
rm(df_map)
rm(df_map_data)
rm(df_map_coords)
rm(df_map_St)
rm(landcover)
rm(parks)
rm(kanton)

1.2.2 DATA TABLE EXPORTATION

flickr <- Flickr_park[c(39, 40, 41, 19, 23, 28, 38, 43, 44, 45, 47)]

# rename
names(flickr)[names(flickr) == "post_guid"] <- "photo_id"
names(flickr)[names(flickr) == "user_guid"] <- "USER"
names(flickr)[names(flickr) == "post_create_date"] <- "DATE"
names(flickr)[names(flickr) == "gridcode"] <- "USE"
names(flickr)[names(flickr) == "NAME"] <- "CANTON"
names(flickr)[names(flickr) == "SHAPE_Area"] <- "km2"
names(flickr)[names(flickr) == "tags"] <- "TAGS"
names(flickr)[names(flickr) == "Name"] <- "PARK"
names(flickr)[names(flickr) == "post_title"] <- "TITLE"
names(flickr)[names(flickr) == "post_body"] <- "BODY"

flickr <- flickr %>%
    mutate(lng = unlist(map(flickr$geometry,1)),
           lat = unlist(map(flickr$geometry,2)))
flickr <- st_drop_geometry(flickr)

#General statistics of each column
summary(flickr$photo_id)
##    Length     Class      Mode 
##     77694 character character
#The library dplyr provide several notifications/observations, so we are going to avoid to write in the report.
options(dplyr.summarise.inform = FALSE)

1.2.3 FILTERING THE LAND COVER

Only pictures that are in categories of land cover that are natural

# Using the table obtained before, we create another subset avoiding the pictures in the land cover 100, 120, 140, 160
# The pictures that are in land cover related to natural categories were considered
flickr_nat<- filter(flickr, USE!=100, USE!=120, USE!=140, USE!=160) # The expression USE! means avoid/reduce

#General statistics of each column
summary(flickr_nat$photo_id)
##    Length     Class      Mode 
##     56804 character character

2. GENERAL ANALYSIS OF PARKS

# Application of the filter of the use, only natural land cover
  
db <- flickr_nat #We create a copy for future use

ddbb <- db # We create another copy for future use

a. Number of visitors each Swiss National Parks, according the land cover,

NA values refers to users or land cover category that doesn’t exist in the park

#Filtering with a dynamic table to identify the parks, user, and the land cover.
landcover <- flickr%>%
  group_by(PARK, USE, USER)%>%
  summarise(land_picture = n()) #Total number of picture in a land cover, per park, taken by a user.

#Counting the number of users per park in each land cover
landcover <- landcover %>%
  group_by(PARK, USE) %>%
  summarise(park_landcover = n()) #Total number of user who took pictures in a land cover per park.

#Pivot table that create a matrix of park (rows) and land cover (columns)
lu<- pivot_wider(landcover, names_from = USE, values_from = park_landcover)
landcover_park <- as.data.frame(lu) # We transform the matrix into a dataframe 

#Rename of the codes (numbers) with the names of the days
    #Land cover categories that are reduced
names(landcover_park)[names(landcover_park)=="100"] <- "Building area"
names(landcover_park)[names(landcover_park)=="120"] <- "Traffic and transportation surface"
names(landcover_park)[names(landcover_park)=="140"] <- "Special settlement areas"
names(landcover_park)[names(landcover_park)=="160"] <- "Recreation and green spaces"

    #Land cover categories that are considered for the analysis
names(landcover_park)[names(landcover_park)=="200"] <- "Fruit growing, viticulture, horticulture"
names(landcover_park)[names(landcover_park)=="220"] <- "Arable and Forage Cultivation"
names(landcover_park)[names(landcover_park)=="240"] <- "Alpine farming"
names(landcover_park)[names(landcover_park)=="300"] <- "Forest"
names(landcover_park)[names(landcover_park)=="400"] <- "Lakes and rivers"
names(landcover_park)[names(landcover_park)=="420"] <- "Unproductives"

#Organizing the table by alphabetical order of Parks names
landcover_park <- landcover_park %>% arrange(PARK)
#Exporting the table to csv in the root folder
#write.csv(landcover_park,"~/GitHub/Flickr_SwissParks/Results/LandCover.csv", row.names = FALSE)

#Displaying  the table with a better format
kbl(landcover_park)%>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left", latex_options = c("striped", "repeat_header"))
PARK Building area Traffic and transportation surface Special settlement areas Recreation and green spaces Arable and Forage Cultivation Alpine farming Forest Lakes and rivers Unproductives Fruit growing, viticulture, horticulture
Biosfera Val Müstair 53 28 1 6 69 79 77 23 64 NA
Jurapark Aargau 124 64 10 17 166 NA 102 43 1 25
Landschaftspark Binntal 15 10 1 2 52 14 42 17 39 NA
Naturpark Beverin 42 52 6 15 127 85 163 78 102 2
Naturpark Diemtigtal 7 8 NA NA 21 41 29 10 19 NA
Naturpark Gantrisch 66 48 3 9 195 82 139 89 26 9
Naturpark Pfyn-Finges 113 39 18 24 105 104 80 45 158 38
Naturpark Thal 29 22 2 1 57 50 87 2 3 6
Parc du Doubs 93 73 4 8 127 75 162 78 28 4
Parc Ela 149 89 16 29 278 158 307 84 162 9
Parc Jura vaudois 173 103 14 36 191 214 280 141 37 13
Parc naturel périurbain du Jorat NA 2 1 1 15 NA 38 3 NA NA
Parc naturel régional de la Vallée du Trient 58 45 42 4 67 81 223 61 220 15
Parc naturel régional Gruyère Pays-d’Enhaut 297 190 18 299 636 429 396 103 176 9
Parc régional Chasseral 118 81 13 20 156 172 152 5 5 30
Parco Val Calanca 7 4 2 NA 10 11 28 6 11 NA
Regionaler Naturpark Schaffhausen 22 16 3 3 56 1 39 10 1 12
Schweizerischer Nationalpark NA 1 NA NA NA 1 85 8 60 NA
UNESCO Biosphäre Entlebuch 25 9 1 4 75 74 68 10 34 1
Wildnispark Zürich Sihlwald 1 7 NA NA NA NA 50 5 NA NA
rm(lu)
rm(landcover)

b. Area (km2) of national parks

#Filtering in a dynamic table to extract Park and square area
#We need this step because some parks have several polygons (with different square area)
#So the extracted information of square area is provided by the surface of each polygon
#where the points (Flickr pictures) where located.
area_portions <- ddbb %>%
  group_by(PARK, km2) %>%
  summarise(photos_km = n())

  #Summarizing the total area of the polygons that compose the parks where the points where located.
area_park <- area_portions %>%
  group_by(PARK) %>%
  summarise(km = round(sum(km2),2)) #Round for having just two decimals, and sum for adding all polygons surface 

# Obtaining the total area (km2) covered by all parks:
area_park %>%
  summarize(area = sum(km))#km2
## # A tibble: 1 x 1
##    area
##   <dbl>
## 1 5848.
rm(area_portions)

c. Count of pictures in the parks

#Filtering in a dynamic table to extract the number of pictures per Park
photos <- ddbb %>%
  group_by(PARK) %>%
  summarise(photo = n()) %>% #Total number of pictures per park
  mutate(ratio_photos= round((photo / sum(photo))*100, 2)) %>% #Percentage with 2 decimal of pictures 
  arrange(desc(ratio_photos)) #Sort the table per percentage of pictures

#Filtering in a dynamic table to extract Park and square area#The total number of pictures is:
ddbb %>%
  summarize(count =  n()) #pictures
##   count
## 1 56804

d. Count of Users in the parks

#Filtering in a dynamic table to extract the number of pictures per Park and users
users_flickr <- ddbb %>%
  group_by(PARK, USER) %>%
  summarise(user_photos =  n())

#From the previous filter, we can count the number of users per park
users_flickr <- users_flickr %>%
  group_by(PARK) %>%
  summarise(user = n())%>%
  mutate(ratio_users= round((user / sum(user))*100, 2)) %>% 
  arrange(desc(ratio_users))

#The total summary about the number of users
users_flickr %>%
  summarize(users = sum(user))#users
## # A tibble: 1 x 1
##   users
##   <int>
## 1  5743

d. Statistic of area (km2), users and photos for each Swiss National Parks: NOTE: THESE VALUES ONLY CONSIDER THE NATURAL LAND COVER (the dismissed categories are: Building area”, “Traffic and transportation surface”, “Special settlement areas”, “Recreation and green spaces”

general <- data.frame("Parks"= area_park$PARK, "Area(km2)"= area_park$km, "#Flickr"= photos$photo, "%Flickr"= photos$ratio_photos, "Flickr/km2"=round((photos$photo/area_park$km),2),"#Users"=users_flickr$user, "%Users"=users_flickr$ratio_users, "Users/km2"=round((users_flickr$user/area_park$km),2), check.names=FALSE )

general <- general %>% arrange(Parks)

kbl(general) %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Parks Area(km2) #Flickr %Flickr Flickr/km2 #Users %Users Users/km2
Biosfera Val Müstair 198.64 13327 23.46 67.09 1135 19.76 5.71
Jurapark Aargau 299.05 6199 10.91 20.73 643 11.20 2.15
Landschaftspark Binntal 164.78 6052 10.65 36.73 529 9.21 3.21
Naturpark Beverin 515.11 4345 7.65 8.44 453 7.89 0.88
Naturpark Diemtigtal 135.50 4251 7.48 31.37 363 6.32 2.68
Naturpark Gantrisch 405.68 3015 5.31 7.43 357 6.22 0.88
Naturpark Pfyn-Finges 276.51 2992 5.27 10.82 356 6.20 1.29
Naturpark Thal 139.39 2883 5.08 20.68 338 5.89 2.42
Parc du Doubs 293.65 2786 4.90 9.49 305 5.31 1.04
Parc Ela 657.82 1768 3.11 2.69 239 4.16 0.36
Parc Jura vaudois 530.62 1715 3.02 3.23 198 3.45 0.37
Parc naturel périurbain du Jorat 9.38 1529 2.69 163.01 172 2.99 18.34
Parc naturel régional de la Vallée du Trient 206.92 1314 2.31 6.35 129 2.25 0.62
Parc naturel régional Gruyère Pays-d’Enhaut 632.10 1290 2.27 2.04 120 2.09 0.19
Parc régional Chasseral 473.30 1140 2.01 2.41 107 1.86 0.23
Parco Val Calanca 120.49 759 1.34 6.30 86 1.50 0.71
Regionaler Naturpark Schaffhausen 213.03 587 1.03 2.76 71 1.24 0.33
Schweizerischer Nationalpark 170.33 364 0.64 2.14 51 0.89 0.30
UNESCO Biosphäre Entlebuch 394.49 326 0.57 0.83 46 0.80 0.12
Wildnispark Zürich Sihlwald 10.95 162 0.29 14.79 45 0.78 4.11
#write.csv(general,"~/GitHub/Flickr_SwissParks/Results/General.csv", row.names = FALSE)

3. TEMPORAL ANALYSIS OF PARKS (Using users’ visits)

#Pictures taken by user
contrib <- ddbb %>%
  group_by(PARK, USER)%>%
  summarise(photo = n())
summary(contrib$photo)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    1.000    1.000    2.000    9.891    6.000 1629.000
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Picture_Park_Taken.png")       

ggplot(contrib, aes(x=log(photo))) + 
  geom_histogram(aes(y=(..count..)/sum(..count..)),
                 binwidth = .1,
                 colour="black", fill="white")+
  facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
  theme_bw()+
  theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 6),
          strip.text = element_text(face = "italic"),
          text = element_text(size = 10))+
  ggtitle('Distribution of percentage of pictures taken by users in the Parks')

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Trend_Pictures.png")       

x <-contrib$photo
y <- x + rnorm(2500, 0, 50)   
## Warning in x + rnorm(2500, 0, 50): longer object length is not a multiple of
## shorter object length
qqplot(x, y)

3.1 YEARLY VISITOR IN THE PARKS

ddbb$DATE <- as.Date(ddbb$DATE)
ddbb$YEAR <- format(ddbb$DATE, format="%Y")

#Calculation of statistics per year
year_photo <- ddbb %>%
  group_by(PARK, YEAR, USER)%>%
  summarise(photo_yearly = n())

yearly_counts <- year_photo %>%
  group_by(PARK, YEAR)%>%
  summarise(VISITS = n())

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/general_trend.png")       

ggplot(data = yearly_counts, aes(x = YEAR, y = VISITS, group = 1)) +
    geom_line() +
    facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
    theme_bw()+
    theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 6),
          strip.text = element_text(face = "italic"),
          text = element_text(size = 10))+
  ggtitle('Annual distribution of users per Park')

rm(year_photo)

3.2 PARK USERS PER SEASON

Number of users per season in each Swiss National Parks

#create dates variable for your column that contains dates 
dates <- ddbb$DATE
#get the month of the date, create new column called month
ddbb$month<-(month(dates, label=TRUE))

ddbb$SEASON <- ifelse(ddbb$month %in% c('May','Jun','Jul'), "Summer",
                    ifelse (ddbb$month %in% c('Aug','Sep','Oct'), "Autumn",
                            ifelse (ddbb$month %in% c('Nove','Dec','Jan'), 
                                    "Winter", "Spring")))
#Calculation of pictures per user in seasons

season <- ddbb %>%
  group_by(PARK, SEASON, USER)%>%
  summarise(picture_season = n())

#From the previous table, we count the number of users per park in each season

f <- c("Summer", "Spring", "Winter", "Autumn")
color_list <- c("#ffbf00", "#00b04f","#00b0f0", "#ed7c31")
col_order <- c("PARK", "Autumn", "Winter", "Spring", "Summer")

season_users <- season %>%
  group_by(PARK, SEASON) %>%
  summarise(season_users = n())%>%
  mutate(SEASON =  factor(SEASON, levels = f))%>%
  arrange(SEASON)  

season_park<- pivot_wider(season_users, names_from = SEASON, values_from = season_users)
season_park <- season_park[, col_order]
season_park <- season_park %>% arrange(PARK)

#write.csv(season_park,"//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Season_Park.csv", row.names = FALSE)

  #Display of the table with a better format
kbl(season_park)%>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
PARK Autumn Winter Spring Summer
Biosfera Val Müstair 97 20 21 73
Jurapark Aargau 91 48 97 100
Landschaftspark Binntal 51 17 16 35
Naturpark Beverin 144 52 67 141
Naturpark Diemtigtal 31 16 22 20
Naturpark Gantrisch 146 102 138 124
Naturpark Pfyn-Finges 140 55 91 115
Naturpark Thal 53 28 49 43
Parc du Doubs 120 54 96 119
Parc Ela 271 121 158 215
Parc Jura vaudois 216 156 199 193
Parc naturel périurbain du Jorat 14 12 20 13
Parc naturel régional de la Vallée du Trient 213 44 85 178
Parc naturel régional Gruyère Pays-d’Enhaut 458 276 281 418
Parc régional Chasseral 144 94 122 138
Parco Val Calanca 21 3 15 17
Regionaler Naturpark Schaffhausen 39 20 25 34
Schweizerischer Nationalpark 72 11 9 35
UNESCO Biosphäre Entlebuch 63 36 52 53
Wildnispark Zürich Sihlwald 12 11 23 16
  #Stacked normalized horizontal bar graph
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/season_park.png") 

ggplot(season_users, #my data
       aes(x = PARK, y = season_users, fill = SEASON, group = SEASON)) + 
         geom_bar(position = "fill", #Creates stacked bars with 100% of proportion
                  stat="identity", 
                  color='white', 
                  width = 0.8)+
  scale_y_continuous(labels = scales::percent)+ # Change the name of the labels into percentages
  scale_fill_manual(values=color_list)+
  guides(fill = guide_legend(reverse = TRUE))+ #Change the order of the names in the label
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
        legend.position = 'bottom',
        axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
        axis.text.y = element_text(size = 9),
        strip.background = element_rect(fill = "#17252D", color = "#17252D"),
        strip.text = element_text(size = rel(1), face = "bold", color = "white", margin = margin(5,0,5,0)))+
  scale_x_discrete(labels = function(x) str_wrap(x, width = 50))+
  ggtitle("Proportion of visitors per Season")+ 
  labs(x ="Parks", y = "Visitors")+ 
  ggplot2::coord_flip() #We change the axis to fit into horizontal bars

rm(season)
rm(season_users)

3.3 VISITS PER DAY OF THE WEEK

Number of visitors per day in each Swiss National Parks

ddbb$DAY <- weekdays(as.Date(ddbb$DATE))
ddbb$DAY <- factor(ddbb$DAY, levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))

photoUser_day <- ddbb %>%
  group_by(PARK, DAY, USER)%>%
  summarise(photo_day = n())

  #Counting the number of users per park in each season
daily <- photoUser_day %>%
  group_by(PARK, DAY) %>%
  summarise(visit_day = n())

sd<- pivot_wider(daily, names_from = DAY, values_from = visit_day)

daily_park <- as.data.frame(sd)
daily_park <- daily_park %>% arrange(PARK)

#Display of the table with a better format
kbl(daily_park)%>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
PARK Monday Tuesday Wednesday Thursday Friday Saturday Sunday
Biosfera Val Müstair 35 37 35 31 33 61 43
Jurapark Aargau 55 40 50 42 52 66 86
Landschaftspark Binntal 23 21 17 17 24 25 33
Naturpark Beverin 54 49 49 66 63 102 109
Naturpark Diemtigtal 10 9 12 10 16 28 27
Naturpark Gantrisch 57 63 68 73 81 123 139
Naturpark Pfyn-Finges 65 60 47 55 77 90 109
Naturpark Thal 23 16 20 26 16 39 54
Parc du Doubs 41 38 47 42 59 100 110
Parc Ela 120 127 115 119 137 174 184
Parc Jura vaudois 88 88 89 84 126 199 222
Parc naturel périurbain du Jorat 10 6 6 2 10 8 18
Parc naturel régional de la Vallée du Trient 73 75 67 79 77 115 131
Parc naturel régional Gruyère Pays-d’Enhaut 182 160 183 187 218 390 376
Parc régional Chasseral 65 64 62 60 85 113 151
Parco Val Calanca 10 10 9 7 9 21 12
Regionaler Naturpark Schaffhausen 14 12 19 9 15 33 33
Schweizerischer Nationalpark 18 20 17 21 18 26 23
UNESCO Biosphäre Entlebuch 38 24 27 34 35 46 56
Wildnispark Zürich Sihlwald 6 2 11 4 3 14 23
#write.csv(daily_park,"~/GitHub/Flickr_SwissParks/Results/Day-Visits_Park.csv", row.names = FALSE)

Bar plot of visitors per day in the Parks

#Horizontal bar chart of daily number of visitors per Park
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park.png")   #Export the graph as *.png

ggplot(daily, #my data
       aes(x = PARK, y = visit_day, fill = DAY, group = DAY)) +
         geom_bar(position = position_dodge(), 
                  stat="identity", 
                  color='white', 
                  width = 0.8)+
         scale_fill_manual(values=c("#2980b9", "#5dade2", "#76d7c4","#2ecc71", "#27ae60", "#f39c12","#d35400"))+
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
        legend.position = 'bottom',
        axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
        axis.text.y = element_text(size = 9),
        strip.background = element_rect(fill = "#17252D", color = "#17252D"),
        strip.text = element_text(size = rel(0.5), face = "bold", color = "white", margin = margin(5,0,5,0)))+
  scale_x_discrete(labels = function(x) str_wrap(x, width = 24))+
  xlab('Swiss National Parks') + ylab('Number of visitors') +
  ggtitle('Daily number of visitors')

Mosaic plot of visitors per day in the Parks (Two test)

  #GRAPH OF MOSAIC PLOT OF PARKS AND DAYS
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic1.png")       

ggplot(data = photoUser_day) +
  geom_mosaic(aes(x=product(DAY, PARK), fill = DAY), offset = 0.02) +
  scale_fill_manual(values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400"))+
  labs(y = "DAYS", title=" Mosaic plot of visits per Day in Parks ") +
  theme(legend.position = "none",
        axis.text.y=element_text(size = 10),
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10)
        )
## Warning: `unite_()` was deprecated in tidyr 1.2.0.
## Please use `unite()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

#POINT GRAPH OF FREQUENCY OF VISIT IN PARKS PER DAY
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic2.png")       

ggplot(daily, aes(x = PARK, y = visit_day, color = DAY)) +
  geom_point() +
  labs(x = "Days", y = "Visits") +
  scale_color_manual(name = "Days:", values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400")) +
  theme(legend.title = element_text(size = 14, face = 2),
        legend.position = 'bottom',
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8))+
  ggtitle('Daily total number of visitors in Parks')

4. USERS PROFILES

Analysis of users according the canton origin, visited park, and time span between the first and the last picture

4.1. ANALYSIS OF USERS BY CANTON

canton<- ddbb %>%
  group_by(CANTON, USER)%>%
  summarise(canton_user = n())

co<- pivot_wider(canton, names_from = CANTON, values_from = canton_user)
canton_users <- as.data.frame(co)
canton_users[is.na(canton_users)] = 0
canton_users <- canton_users %>% mutate_if(is.numeric, ~1 * (. > 0))
canton_users$Total_Canton = rowSums(canton_users[,c(2:13)])
canton_users <- canton_users[order(canton_users$Total_Canton, decreasing = FALSE),]

#write.csv(canton_users,"~/GitHub/Flickr_SwissParks/Results/Canton_Users.csv", row.names = FALSE)

ggplot(canton_users, aes(x=Total_Canton))+
  geom_histogram(binwidth=1 ,colour="black", fill="white")+
  geom_vline(aes(xintercept=mean(Total_Canton)), color="blue", linetype="dashed", size=1)+
  labs(title="Histogram of Cantons by users ",x="Number of visited Cantons", y = "Count of users")+
  scale_x_continuous(breaks = seq(0, 12, 1))+
  scale_y_continuous(breaks = seq(0, 6000, 200))

#Heatmap

coul <- colorRampPalette(c("beige", "green"))(5)

rownames(canton_users) <- canton_users[,1]
canton_users[,1]<- NULL
canton_users[,13]<- NULL
data <-as.matrix(canton_users)

heatmap(data, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Canton")

rm(canton)
rm(co)
rm(coul)

4.2. ANALYSIS OF USERS BY PARK

park <- ddbb %>%
  group_by(PARK, USER)%>%
  summarise(park_user = n())

po<- pivot_wider(park, names_from = PARK, values_from = park_user)
park_users <- as.data.frame(po)
park_users[is.na(park_users)] = 0
park_users <- park_users %>% mutate_if(is.numeric, ~1 * (. > 0))
park_users$Total_Park = rowSums(park_users[,c(2:21)])
park_users <- park_users[order(park_users$Total_Park, decreasing = FALSE),]

#write.csv(park_users,"~/GitHub/Flickr_SwissParks/Results/Park_Users.csv", row.names = FALSE)

ggplot(park_users, aes(x=Total_Park))+
  geom_histogram(binwidth=1 ,colour="black", fill="white")+
  geom_vline(aes(xintercept=mean(Total_Park)), color="blue", linetype="dashed", size=1)+
  labs(title="Histogram of Parks by users ",x="Number of visited Parks", y = "Count of users")+
  scale_x_continuous(breaks = seq(0, 20, 1))+
  scale_y_continuous(breaks = seq(0, 6000, 200))

#Heatmap

coul <- colorRampPalette(c("beige", "blue"))(10)

rownames(park_users) <- park_users[,1]
park_users[,1]<- NULL
park_users[,21]<- NULL
park_users <-as.matrix(park_users)

heatmap(park_users, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Park")

rm(park)
rm(po)
rm(coul)

4.3. ANALYSIS OF USERS TIME SPEND

user <- ddbb %>%
  group_by(USER)%>%
  summarise(photos = n())

user <- as.data.frame(user)

dt <- ddbb %>%
  group_by(USER, DATE)%>%
  summarise(photos = n())%>%
  mutate(DATE=as.Date(DATE))

dt1 <- dt %>%
    group_by(USER) %>%
    arrange(DATE) %>%
    slice(1L)

first <- as.data.frame(dt1)
names(first)[names(first)=="DATE"] <- "start"

dt2 <- dt %>%
    group_by(USER) %>%
     arrange(desc(DATE)) %>%
    slice(1L)

last <- as.data.frame(dt)
names(last)[names(last)=="DATE"] <- "end"

dt_final <- merge(last, first, by="USER")

dur <- dt_final %>%
  mutate(
    days = end - start,
    seconds = as.numeric(difftime(end, start)) / 365.25,
    years = round(interval(start, end) / years(1))
 )

time_user <-  merge(user, dur, by="USER", all=T)
time_user <- subset(time_user, select=-c(photos.x,photos.y))
time_user <- time_user[order(time_user$seconds, decreasing = TRUE),]

ggplot(time_user, aes(x=years))+
  geom_histogram(binwidth=1 ,colour="black", fill="white")+
  geom_vline(aes(xintercept=mean(years)), color="blue", linetype="dashed", size=1)+
  labs(title="Histogram of time between first and last picture by users ",x="time span in years", y = "Count of users")+
  scale_x_continuous(breaks = seq(0, 20, 1))+
  scale_y_continuous(breaks = seq(0, 6000, 500))

#write.csv(time_user,"~/GitHub/Flickr_SwissParks/Results/Time_Users.csv", row.names = FALSE)

rm(user)
rm(dt)
rm(dt1)
rm(dt2)
rm(last)
rm(dur)

5.SEMANTIC ANALYSIS

set.seed(42)

base <- data.frame(ddbb$photo_id, ddbb$USER, ddbb$PARK,  ddbb$TAGS)
names(base)[names(base) == "ddbb.PARK"] <- "doc_id"
names(base)[names(base) == "ddbb.TAGS"] <- "text"
names(base)[names(base) == "ddbb.USER"] <- "author"
names(base)[names(base) == "ddbb.photo_id"] <- "origin"

base <- base %>% 
  mutate(text = strsplit(as.character(text), ",")) %>%
  unnest(text)

base <- base[!duplicated(base),]

base$text <- gsub("[[:digit:]]", " ", base$text) # Remove numbers
base$text <- gsub("[][!#$%()*,.:;<=>@^_`|~.{}„¨“”´´°ã]", " ", base$text) # Remove extra symbols
base$text <- gsub("[[:punct:]]", " ", base$text, perl=TRUE) #remove punctuation
base$text <- gsub('\\b\\w{1,2}\\s','',base$text) #Remove words with less than 2 characters
base$text <- gsub('\\b\\w{6,}\\s','',base$text) #Remove words with more than 7 characters
base$text <- gsub("\\s+", " ", str_trim(base$text)) # Remove extra whitespaces

base_source=DataframeSource(base)
base_corpus=VCorpus(base_source)
#base_corpus_clean <- tm_map(base_corpus, removeWords, stopwords("english"))
#base_corpus_clean <- tm_map(base_corpus,stemDocument) #find to root of the words 

5.0 ANALYSIS: GENERAL

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Parks.png")       

wordcloud(base_corpus, 
          min.freq = 5,
          max.words = 50,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

#myStopwords <- setdiff(myStopwords, c("d", "e")) 
#text_corpus_clean <- tm_map(base_corpus_clean, removeWords, myStopwords)

5.1 ANALYSIS: Schweizerischer Nationalpark

#png(//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Nationalpark.png")       

sub_jurapark <- base%>%filter(doc_id=="Schweizerischer Nationalpark")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("german"))
#sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words 
jurapark <- wordcloud(sub_jurapark, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.2 ANALYSIS: Jurapark Aargau

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jurapark.png")       

sub_jurapark <- base%>%filter(doc_id=="Jurapark Aargau")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("german"))
#sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words 
jurapark <- wordcloud(sub_jurapark, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.3 ANALYSIS: Parc naturel régional Gruyère Pays-d’Enhaut

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gruyere.png")       

sub_gruyere <- base%>%filter(doc_id=="Parc naturel régional Gruyère Pays-d’Enhaut")
sub_gruyere=DataframeSource(sub_gruyere)
sub_gruyere=VCorpus(sub_gruyere)
sub_gruyere <- tm_map(sub_gruyere, removeWords, stopwords("french"))
#sub_gruyere <- tm_map(sub_gruyere,stemDocument) #find to root of the words 
gruyere <- wordcloud(sub_gruyere, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))
## Warning in wordcloud(sub_gruyere, min.freq = 5, max.words = 100, random.order =
## FALSE, : wwwbernergartenphilosophcomcheuferuegenchschweizorganicspiriteu could
## not be fit on page. It will not be plotted.

5.4 ANALYSIS: Biosfera Val Müstair

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Mustair.png")       

sub_mustair <- base%>%filter(doc_id=="Biosfera Val Müstair")
sub_mustair=DataframeSource(sub_mustair)
sub_mustair=VCorpus(sub_mustair)
sub_mustair <- tm_map(sub_mustair, removeWords, stopwords("german"))
#sub_mustair <- tm_map(sub_mustair,stemDocument) #find to root of the words 
mustair <- wordcloud(sub_mustair, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.5 ANALYSIS: Wildnispark Zürich Sihlwald

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Sihlwald.png")       

sub_sihlwald <- base%>%filter(doc_id=="Wildnispark Zürich Sihlwald")
sub_sihlwald=DataframeSource(sub_sihlwald)
sub_sihlwald=VCorpus(sub_sihlwald)
sub_sihlwald <- tm_map(sub_sihlwald, removeWords, stopwords("german"))
#sub_sihlwald <- tm_map(sub_sihlwald,stemDocument) #find to root of the words 
sihlwald <- wordcloud(sub_sihlwald, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.6 ANALYSIS: Parc régional Chasseral

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Chasseral.png")       

sub_chasseral <- base%>%filter(doc_id=="Parc régional Chasseral")
sub_chasseral=DataframeSource(sub_chasseral)
sub_chasseral=VCorpus(sub_chasseral)
sub_chasseral <- tm_map(sub_chasseral, removeWords, stopwords("french"))
#sub_chasseral <- tm_map(sub_chasseral,stemDocument) #find to root of the words 
chasseral <- wordcloud(sub_chasseral, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.7 ANALYSIS: Parc Jura vaudois

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Vaudois.png")       

sub_vaudois <- base%>%filter(doc_id=="Parc Jura vaudois")
sub_vaudois=DataframeSource(sub_vaudois)
sub_vaudois=VCorpus(sub_vaudois)
sub_vaudois <- tm_map(sub_vaudois, removeWords, stopwords("french"))
#sub_vaudois <- tm_map(sub_vaudois,stemDocument) #find to root of the words 
vaudois <- wordcloud(sub_vaudois, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.8 ANALYSIS: Parc Ela

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Ela.png")       

sub_ela <- base%>%filter(doc_id=="Parc Ela")
sub_ela=DataframeSource(sub_ela)
sub_ela=VCorpus(sub_ela)
sub_ela <- tm_map(sub_ela, removeWords, stopwords("german"))
#sub_ela <- tm_map(sub_ela,stemDocument) #find to root of the words 
ela <- wordcloud(sub_ela, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.9 ANALYSIS: Naturpark Gantrisch

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gantrisch.png")       

sub_gantrisch <- base%>%filter(doc_id=="Naturpark Gantrisch")
sub_gantrisch=DataframeSource(sub_gantrisch)
sub_gantrisch=VCorpus(sub_gantrisch)
sub_gantrisch <- tm_map(sub_gantrisch, removeWords, stopwords("german"))
#sub_gantrisch <- tm_map(sub_gantrisch,stemDocument) #find to root of the words 
gantrisch <- wordcloud(sub_gantrisch, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.10 ANALYSIS: Naturpark Beverin

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Beverin.png")       

sub_beverin <- base%>%filter(doc_id=="Naturpark Beverin")
sub_beverin=DataframeSource(sub_beverin)
sub_beverin=VCorpus(sub_beverin)
sub_beverin <- tm_map(sub_beverin, removeWords, stopwords("german"))
#sub_beverin <- tm_map(sub_beverin,stemDocument) #find to root of the words 
beverin <- wordcloud(sub_beverin, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.11 ANALYSIS: Regionaler Naturpark Schaffhausen

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Schaffhausen.png")       

sub_schaffhausen <- base%>%filter(doc_id=="Regionaler Naturpark Schaffhausen")
sub_schaffhausen=DataframeSource(sub_schaffhausen)
sub_schaffhausen=VCorpus(sub_schaffhausen)
sub_schaffhausen <- tm_map(sub_schaffhausen, removeWords, stopwords("german"))
#sub_schaffhausen <- tm_map(sub_schaffhausen,stemDocument) #find to root of the words 
schaffhausen <- wordcloud(sub_schaffhausen, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.12 ANALYSIS: Parc naturel régional de la Vallée du Trient

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Trient.png")       

sub_trient <- base%>%filter(doc_id=="Parc naturel régional de la Vallée du Trient")
sub_trient=DataframeSource(sub_trient)
sub_trient=VCorpus(sub_trient)
sub_trient <- tm_map(sub_trient, removeWords, stopwords("french"))
#sub_trient <- tm_map(sub_trient,stemDocument) #find to root of the words 
trient <- wordcloud(sub_trient, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.13 ANALYSIS: Parc du Doubs

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Doubs.png")       

sub_doubs <- base%>%filter(doc_id=="Parc du Doubs")
sub_doubs=DataframeSource(sub_doubs)
sub_doubs=VCorpus(sub_doubs)
sub_doubs <- tm_map(sub_doubs, removeWords, stopwords("french"))
#sub_doubs <- tm_map(sub_doubs,stemDocument) #find to root of the words 
doubs <- wordcloud(sub_doubs, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.14 ANALYSIS: Naturpark Thal

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Thal.png")       

sub_thal <- base%>%filter(doc_id=="Naturpark Thal")
sub_thal=DataframeSource(sub_thal)
sub_thal=VCorpus(sub_thal)
sub_thal <- tm_map(sub_thal, removeWords, stopwords("german"))
#sub_thal <- tm_map(sub_thal,stemDocument) #find to root of the words 
thal <- wordcloud(sub_thal, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.15 ANALYSIS: Naturpark Pfyn-Finges

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Pfyn-Finges.png")       

sub_pfyn_finges <- base%>%filter(doc_id=="Naturpark Pfyn-Finges")
sub_pfyn_finges=DataframeSource(sub_pfyn_finges)
sub_pfyn_finges=VCorpus(sub_pfyn_finges)
sub_pfyn_finges <- tm_map(sub_pfyn_finges, removeWords, stopwords("german"))
#sub_pfyn_finges <- tm_map(sub_pfyn_finges,stemDocument) #find to root of the words 
pfyn_finges  <- wordcloud(sub_pfyn_finges, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.16 ANALYSIS: Naturpark Diemtigtal

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Diemigtal.png")       

sub_diemtigtal <- base%>%filter(doc_id=="Naturpark Diemtigtal")
sub_diemtigtal=DataframeSource(sub_diemtigtal)
sub_diemtigtal=VCorpus(sub_diemtigtal)
sub_diemtigtal <- tm_map(sub_diemtigtal, removeWords, stopwords("german"))
#sub_diemtigtal <- tm_map(sub_diemtigtal,stemDocument) #find to root of the words 
diemtigtal  <- wordcloud(sub_diemtigtal, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.17 ANALYSIS: UNESCO Biosphäre Entlebuch

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Entlebuch.png")       

sub_entlebuch <- base%>%filter(doc_id=="UNESCO Biosphäre Entlebuch")
sub_entlebuch=DataframeSource(sub_entlebuch)
sub_entlebuch=VCorpus(sub_entlebuch)
sub_entlebuch <- tm_map(sub_entlebuch, removeWords, stopwords("german"))
#sub_entlebuch <- tm_map(sub_entlebuch,stemDocument) #find to root of the words 
entlebuch  <- wordcloud(sub_entlebuch, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.18 ANALYSIS: Parco Val Calanca

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Calanca.png")       

sub_calanca<- base%>%filter(doc_id=="Parco Val Calanca")
sub_calanca=DataframeSource(sub_calanca)
sub_calanca=VCorpus(sub_calanca)
sub_calanca <- tm_map(sub_calanca, removeWords, stopwords("italian"))
#sub_calanca <- tm_map(sub_calanca,stemDocument) #find to root of the words 
calanca  <- wordcloud(sub_calanca, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.19 ANALYSIS: Landschaftspark Binntal

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Binntal.png")       

sub_binntal<- base%>%filter(doc_id=="Landschaftspark Binntal")
sub_binntal=DataframeSource(sub_binntal)
sub_binntal=VCorpus(sub_binntal)
sub_binntal <- tm_map(sub_binntal, removeWords, stopwords("french"))
#sub_binntal <- tm_map(sub_binntal,stemDocument) #find to root of the words 
binntal  <- wordcloud(sub_binntal, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.20 ANALYSIS: Parc naturel périurbain du Jorat

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jorat.png")       

sub_jorat<- base%>%filter(doc_id=="Parc naturel périurbain du Jorat")
sub_jorat=DataframeSource(sub_jorat)
sub_jorat=VCorpus(sub_jorat)
sub_jorat <- tm_map(sub_jorat, removeWords, stopwords("french"))
#sub_jorat <- tm_map(sub_jorat,stemDocument) #find to root of the words 
jorat  <- wordcloud(sub_jorat, 
          min.freq = 5,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

6. ANALYSIS OF CORRESPONDENCE

###6.1 Matrix with the terms frequencies by park

#Creation of the matrix of terms (words) by Park (Document)
dtm <- DocumentTermMatrix(base_corpus, control= list(wordLengths=c(3, 8))) #erase words with less than 3 character and over 8 characters
head(sort(slam::col_sums(dtm), decreasing = TRUE), n=20) #visualize the most 20 frequent words
##   swiss    alps    jura    snow    vaud   suiza schweiz   alpen   nikon     geo 
##    3107    2945    2907    2774    2452    2278    2011    1903    1754    1657 
##    ride    sony     sky   train     mmf   neige     mgn     car   river    bern 
##    1639    1512    1463    1436    1328    1267    1217    1066    1040    1016
#Distribution of terms in the total contentof the corpora
Heaps_plot(dtm, main="Heap' Law distribution of words") #Frequencies distribution of the number of distinct words"

## (Intercept)           x 
##   0.1670713   0.7092524
Zipf_plot(dtm, main="Zipf' Law distribution of words") # inverse relation of the rank (frequencies) distribution of words

## (Intercept)           x 
##    13.13721    -1.56131

###6.2 Constrution of the contingency table

#transform of the matrix to split the words by Park in a matrix
dtm_df <- reshape2::melt(as.matrix(dtm)) 
dtm_df <- dtm_df[dtm_df$value > 0, ] #Filtering to avoid noise, for keeping row (words) than have frequency >= 1
head(dtm_df,10)
##                                               Docs Terms value
## 9353                             Parc Jura vaudois  aabd     1
## 86203                                     Parc Ela   aad     1
## 107055 Parc naturel régional Gruyère Pays-d’Enhaut   aae     1
## 133172                             Jurapark Aargau   aar     1
## 133173                             Jurapark Aargau   aar     1
## 150896                         Naturpark Gantrisch   aar     1
## 157044                         Naturpark Gantrisch   aar     1
## 175635                             Jurapark Aargau   aar     1
## 177116                             Jurapark Aargau aarau     1
## 178076                             Jurapark Aargau aarau     1
#Redraw data and assemble it into a table to present in row the Parks and the words as columns, with frequencies in the values
contingency<- xtabs(~ Docs + Terms, data = dtm_df) #The occurrences is not considering the frequencies (Value) of the terms
contingency_matrix <- as.matrix(contingency)

###6.3 Constrution of the covariance matrix for the correlation between Parks

#Transformation of the matrix of frequencies into a square matrix per Park
#If both parks share a word once or several times, the correlation will be one,
#If both parks don't share the word the correlation will be zero

parks_congruency <- contingency_matrix %*% t(contingency_matrix) # square matrix
diag(parks_congruency) <- 0 #the relation between themselves is zero

parks_congruency_matrix <- as.data.frame.matrix(parks_congruency) #we transform into a matrix to melted into a edgelist
parks_congruency_matrix
##                                              Jurapark Aargau
## Jurapark Aargau                                            0
## Parc naturel régional Gruyère Pays-d’Enhaut           326989
## Parc Jura vaudois                                     288978
## Schweizerischer Nationalpark                           18934
## Parc Ela                                              253551
## Naturpark Gantrisch                                   193419
## Parc naturel régional de la Vallée du Trient           59763
## Naturpark Pfyn-Finges                                  95073
## Landschaftspark Binntal                                28349
## Naturpark Thal                                         76947
## Parc régional Chasseral                               262261
## UNESCO Biosphäre Entlebuch                             83924
## Biosfera Val Müstair                                   30395
## Parc du Doubs                                         150748
## Parco Val Calanca                                       4620
## Naturpark Beverin                                     218533
## Regionaler Naturpark Schaffhausen                      41397
## Naturpark Diemtigtal                                   25110
## Parc naturel périurbain du Jorat                        1760
## Wildnispark Zürich Sihlwald                            23972
##                                              Parc naturel régional Gruyère Pays-d’Enhaut
## Jurapark Aargau                                                                   326989
## Parc naturel régional Gruyère Pays-d’Enhaut                                            0
## Parc Jura vaudois                                                                2420075
## Schweizerischer Nationalpark                                                      162008
## Parc Ela                                                                         1271810
## Naturpark Gantrisch                                                              1473862
## Parc naturel régional de la Vallée du Trient                                      682515
## Naturpark Pfyn-Finges                                                             643884
## Landschaftspark Binntal                                                           308856
## Naturpark Thal                                                                    495715
## Parc régional Chasseral                                                          1495359
## UNESCO Biosphäre Entlebuch                                                        515567
## Biosfera Val Müstair                                                              255370
## Parc du Doubs                                                                     458241
## Parco Val Calanca                                                                  41065
## Naturpark Beverin                                                                 762728
## Regionaler Naturpark Schaffhausen                                                  76972
## Naturpark Diemtigtal                                                              273170
## Parc naturel périurbain du Jorat                                                   48872
## Wildnispark Zürich Sihlwald                                                        65943
##                                              Parc Jura vaudois
## Jurapark Aargau                                         288978
## Parc naturel régional Gruyère Pays-d’Enhaut            2420075
## Parc Jura vaudois                                            0
## Schweizerischer Nationalpark                             39417
## Parc Ela                                                444544
## Naturpark Gantrisch                                     839060
## Parc naturel régional de la Vallée du Trient            227796
## Naturpark Pfyn-Finges                                   234419
## Landschaftspark Binntal                                  90259
## Naturpark Thal                                          392078
## Parc régional Chasseral                                1642520
## UNESCO Biosphäre Entlebuch                              197003
## Biosfera Val Müstair                                     63518
## Parc du Doubs                                          1416625
## Parco Val Calanca                                        18907
## Naturpark Beverin                                       285674
## Regionaler Naturpark Schaffhausen                        21024
## Naturpark Diemtigtal                                     95135
## Parc naturel périurbain du Jorat                         36932
## Wildnispark Zürich Sihlwald                              16369
##                                              Schweizerischer Nationalpark
## Jurapark Aargau                                                     18934
## Parc naturel régional Gruyère Pays-d’Enhaut                        162008
## Parc Jura vaudois                                                   39417
## Schweizerischer Nationalpark                                            0
## Parc Ela                                                            89176
## Naturpark Gantrisch                                                112550
## Parc naturel régional de la Vallée du Trient                        60839
## Naturpark Pfyn-Finges                                               64007
## Landschaftspark Binntal                                             27270
## Naturpark Thal                                                      18062
## Parc régional Chasseral                                             70578
## UNESCO Biosphäre Entlebuch                                          47229
## Biosfera Val Müstair                                                35099
## Parc du Doubs                                                       23567
## Parco Val Calanca                                                    3453
## Naturpark Beverin                                                   68016
## Regionaler Naturpark Schaffhausen                                    2910
## Naturpark Diemtigtal                                                19143
## Parc naturel périurbain du Jorat                                     1052
## Wildnispark Zürich Sihlwald                                          3457
##                                              Parc Ela Naturpark Gantrisch
## Jurapark Aargau                                253551              193419
## Parc naturel régional Gruyère Pays-d’Enhaut   1271810             1473862
## Parc Jura vaudois                              444544              839060
## Schweizerischer Nationalpark                    89176              112550
## Parc Ela                                            0              777486
## Naturpark Gantrisch                            777486                   0
## Parc naturel régional de la Vallée du Trient   287668              357617
## Naturpark Pfyn-Finges                          390732              442900
## Landschaftspark Binntal                        159174              182659
## Naturpark Thal                                 131943              164252
## Parc régional Chasseral                        547948              858200
## UNESCO Biosphäre Entlebuch                     625873              436586
## Biosfera Val Müstair                           159876              265672
## Parc du Doubs                                  240144              202148
## Parco Val Calanca                               21987               24524
## Naturpark Beverin                             1088697              556623
## Regionaler Naturpark Schaffhausen               52305               46886
## Naturpark Diemtigtal                           133486              284986
## Parc naturel périurbain du Jorat                 7216                7090
## Wildnispark Zürich Sihlwald                     85746               45875
##                                              Parc naturel régional de la Vallée du Trient
## Jurapark Aargau                                                                     59763
## Parc naturel régional Gruyère Pays-d’Enhaut                                        682515
## Parc Jura vaudois                                                                  227796
## Schweizerischer Nationalpark                                                        60839
## Parc Ela                                                                           287668
## Naturpark Gantrisch                                                                357617
## Parc naturel régional de la Vallée du Trient                                            0
## Naturpark Pfyn-Finges                                                              260496
## Landschaftspark Binntal                                                            102975
## Naturpark Thal                                                                      47133
## Parc régional Chasseral                                                            329945
## UNESCO Biosphäre Entlebuch                                                         152317
## Biosfera Val Müstair                                                                96758
## Parc du Doubs                                                                       90928
## Parco Val Calanca                                                                   12772
## Naturpark Beverin                                                                  212721
## Regionaler Naturpark Schaffhausen                                                   10918
## Naturpark Diemtigtal                                                                67323
## Parc naturel périurbain du Jorat                                                     3337
## Wildnispark Zürich Sihlwald                                                          8968
##                                              Naturpark Pfyn-Finges
## Jurapark Aargau                                              95073
## Parc naturel régional Gruyère Pays-d’Enhaut                 643884
## Parc Jura vaudois                                           234419
## Schweizerischer Nationalpark                                 64007
## Parc Ela                                                    390732
## Naturpark Gantrisch                                         442900
## Parc naturel régional de la Vallée du Trient                260496
## Naturpark Pfyn-Finges                                            0
## Landschaftspark Binntal                                     112695
## Naturpark Thal                                               59265
## Parc régional Chasseral                                     344566
## UNESCO Biosphäre Entlebuch                                  196762
## Biosfera Val Müstair                                        132598
## Parc du Doubs                                                98447
## Parco Val Calanca                                            13041
## Naturpark Beverin                                           299442
## Regionaler Naturpark Schaffhausen                            21764
## Naturpark Diemtigtal                                         78609
## Parc naturel périurbain du Jorat                              6650
## Wildnispark Zürich Sihlwald                                  12010
##                                              Landschaftspark Binntal
## Jurapark Aargau                                                28349
## Parc naturel régional Gruyère Pays-d’Enhaut                   308856
## Parc Jura vaudois                                              90259
## Schweizerischer Nationalpark                                   27270
## Parc Ela                                                      159174
## Naturpark Gantrisch                                           182659
## Parc naturel régional de la Vallée du Trient                  102975
## Naturpark Pfyn-Finges                                         112695
## Landschaftspark Binntal                                            0
## Naturpark Thal                                                 14458
## Parc régional Chasseral                                       153556
## UNESCO Biosphäre Entlebuch                                     77915
## Biosfera Val Müstair                                           59712
## Parc du Doubs                                                  34545
## Parco Val Calanca                                               5346
## Naturpark Beverin                                             106925
## Regionaler Naturpark Schaffhausen                               3460
## Naturpark Diemtigtal                                           37848
## Parc naturel périurbain du Jorat                                1351
## Wildnispark Zürich Sihlwald                                    11642
##                                              Naturpark Thal
## Jurapark Aargau                                       76947
## Parc naturel régional Gruyère Pays-d’Enhaut          495715
## Parc Jura vaudois                                    392078
## Schweizerischer Nationalpark                          18062
## Parc Ela                                             131943
## Naturpark Gantrisch                                  164252
## Parc naturel régional de la Vallée du Trient          47133
## Naturpark Pfyn-Finges                                 59265
## Landschaftspark Binntal                               14458
## Naturpark Thal                                            0
## Parc régional Chasseral                              339056
## UNESCO Biosphäre Entlebuch                            95224
## Biosfera Val Müstair                                  17796
## Parc du Doubs                                        314262
## Parco Val Calanca                                      2250
## Naturpark Beverin                                    102866
## Regionaler Naturpark Schaffhausen                     12785
## Naturpark Diemtigtal                                  43534
## Parc naturel périurbain du Jorat                       2585
## Wildnispark Zürich Sihlwald                            7640
##                                              Parc régional Chasseral
## Jurapark Aargau                                               262261
## Parc naturel régional Gruyère Pays-d’Enhaut                  1495359
## Parc Jura vaudois                                            1642520
## Schweizerischer Nationalpark                                   70578
## Parc Ela                                                      547948
## Naturpark Gantrisch                                           858200
## Parc naturel régional de la Vallée du Trient                  329945
## Naturpark Pfyn-Finges                                         344566
## Landschaftspark Binntal                                       153556
## Naturpark Thal                                                339056
## Parc régional Chasseral                                            0
## UNESCO Biosphäre Entlebuch                                    257211
## Biosfera Val Müstair                                           89747
## Parc du Doubs                                                 804965
## Parco Val Calanca                                              23010
## Naturpark Beverin                                             373087
## Regionaler Naturpark Schaffhausen                              27501
## Naturpark Diemtigtal                                          223147
## Parc naturel périurbain du Jorat                               12679
## Wildnispark Zürich Sihlwald                                    20236
##                                              UNESCO Biosphäre Entlebuch
## Jurapark Aargau                                                   83924
## Parc naturel régional Gruyère Pays-d’Enhaut                      515567
## Parc Jura vaudois                                                197003
## Schweizerischer Nationalpark                                      47229
## Parc Ela                                                         625873
## Naturpark Gantrisch                                              436586
## Parc naturel régional de la Vallée du Trient                     152317
## Naturpark Pfyn-Finges                                            196762
## Landschaftspark Binntal                                           77915
## Naturpark Thal                                                    95224
## Parc régional Chasseral                                          257211
## UNESCO Biosphäre Entlebuch                                            0
## Biosfera Val Müstair                                             103838
## Parc du Doubs                                                     75043
## Parco Val Calanca                                                  9620
## Naturpark Beverin                                                545781
## Regionaler Naturpark Schaffhausen                                 15830
## Naturpark Diemtigtal                                              70317
## Parc naturel périurbain du Jorat                                   2484
## Wildnispark Zürich Sihlwald                                       42712
##                                              Biosfera Val Müstair Parc du Doubs
## Jurapark Aargau                                             30395        150748
## Parc naturel régional Gruyère Pays-d’Enhaut                255370        458241
## Parc Jura vaudois                                           63518       1416625
## Schweizerischer Nationalpark                                35099         23567
## Parc Ela                                                   159876        240144
## Naturpark Gantrisch                                        265672        202148
## Parc naturel régional de la Vallée du Trient                96758         90928
## Naturpark Pfyn-Finges                                      132598         98447
## Landschaftspark Binntal                                     59712         34545
## Naturpark Thal                                              17796        314262
## Parc régional Chasseral                                     89747        804965
## UNESCO Biosphäre Entlebuch                                 103838         75043
## Biosfera Val Müstair                                            0         30765
## Parc du Doubs                                               30765             0
## Parco Val Calanca                                            4367         12577
## Naturpark Beverin                                          112859        138885
## Regionaler Naturpark Schaffhausen                            7067         19628
## Naturpark Diemtigtal                                        62897         64488
## Parc naturel périurbain du Jorat                             1090          4417
## Wildnispark Zürich Sihlwald                                  3375         13977
##                                              Parco Val Calanca
## Jurapark Aargau                                           4620
## Parc naturel régional Gruyère Pays-d’Enhaut              41065
## Parc Jura vaudois                                        18907
## Schweizerischer Nationalpark                              3453
## Parc Ela                                                 21987
## Naturpark Gantrisch                                      24524
## Parc naturel régional de la Vallée du Trient             12772
## Naturpark Pfyn-Finges                                    13041
## Landschaftspark Binntal                                   5346
## Naturpark Thal                                            2250
## Parc régional Chasseral                                  23010
## UNESCO Biosphäre Entlebuch                                9620
## Biosfera Val Müstair                                      4367
## Parc du Doubs                                            12577
## Parco Val Calanca                                            0
## Naturpark Beverin                                        18073
## Regionaler Naturpark Schaffhausen                          586
## Naturpark Diemtigtal                                      4167
## Parc naturel périurbain du Jorat                           309
## Wildnispark Zürich Sihlwald                               1226
##                                              Naturpark Beverin
## Jurapark Aargau                                         218533
## Parc naturel régional Gruyère Pays-d’Enhaut             762728
## Parc Jura vaudois                                       285674
## Schweizerischer Nationalpark                             68016
## Parc Ela                                               1088697
## Naturpark Gantrisch                                     556623
## Parc naturel régional de la Vallée du Trient            212721
## Naturpark Pfyn-Finges                                   299442
## Landschaftspark Binntal                                 106925
## Naturpark Thal                                          102866
## Parc régional Chasseral                                 373087
## UNESCO Biosphäre Entlebuch                              545781
## Biosfera Val Müstair                                    112859
## Parc du Doubs                                           138885
## Parco Val Calanca                                        18073
## Naturpark Beverin                                            0
## Regionaler Naturpark Schaffhausen                        49211
## Naturpark Diemtigtal                                     79502
## Parc naturel périurbain du Jorat                          6150
## Wildnispark Zürich Sihlwald                              80323
##                                              Regionaler Naturpark Schaffhausen
## Jurapark Aargau                                                          41397
## Parc naturel régional Gruyère Pays-d’Enhaut                              76972
## Parc Jura vaudois                                                        21024
## Schweizerischer Nationalpark                                              2910
## Parc Ela                                                                 52305
## Naturpark Gantrisch                                                      46886
## Parc naturel régional de la Vallée du Trient                             10918
## Naturpark Pfyn-Finges                                                    21764
## Landschaftspark Binntal                                                   3460
## Naturpark Thal                                                           12785
## Parc régional Chasseral                                                  27501
## UNESCO Biosphäre Entlebuch                                               15830
## Biosfera Val Müstair                                                      7067
## Parc du Doubs                                                            19628
## Parco Val Calanca                                                          586
## Naturpark Beverin                                                        49211
## Regionaler Naturpark Schaffhausen                                            0
## Naturpark Diemtigtal                                                      3945
## Parc naturel périurbain du Jorat                                           348
## Wildnispark Zürich Sihlwald                                               4144
##                                              Naturpark Diemtigtal
## Jurapark Aargau                                             25110
## Parc naturel régional Gruyère Pays-d’Enhaut                273170
## Parc Jura vaudois                                           95135
## Schweizerischer Nationalpark                                19143
## Parc Ela                                                   133486
## Naturpark Gantrisch                                        284986
## Parc naturel régional de la Vallée du Trient                67323
## Naturpark Pfyn-Finges                                       78609
## Landschaftspark Binntal                                     37848
## Naturpark Thal                                              43534
## Parc régional Chasseral                                    223147
## UNESCO Biosphäre Entlebuch                                  70317
## Biosfera Val Müstair                                        62897
## Parc du Doubs                                               64488
## Parco Val Calanca                                            4167
## Naturpark Beverin                                           79502
## Regionaler Naturpark Schaffhausen                            3945
## Naturpark Diemtigtal                                            0
## Parc naturel périurbain du Jorat                             1055
## Wildnispark Zürich Sihlwald                                  5295
##                                              Parc naturel périurbain du Jorat
## Jurapark Aargau                                                          1760
## Parc naturel régional Gruyère Pays-d’Enhaut                             48872
## Parc Jura vaudois                                                       36932
## Schweizerischer Nationalpark                                             1052
## Parc Ela                                                                 7216
## Naturpark Gantrisch                                                      7090
## Parc naturel régional de la Vallée du Trient                             3337
## Naturpark Pfyn-Finges                                                    6650
## Landschaftspark Binntal                                                  1351
## Naturpark Thal                                                           2585
## Parc régional Chasseral                                                 12679
## UNESCO Biosphäre Entlebuch                                               2484
## Biosfera Val Müstair                                                     1090
## Parc du Doubs                                                            4417
## Parco Val Calanca                                                         309
## Naturpark Beverin                                                        6150
## Regionaler Naturpark Schaffhausen                                         348
## Naturpark Diemtigtal                                                     1055
## Parc naturel périurbain du Jorat                                            0
## Wildnispark Zürich Sihlwald                                               292
##                                              Wildnispark Zürich Sihlwald
## Jurapark Aargau                                                    23972
## Parc naturel régional Gruyère Pays-d’Enhaut                        65943
## Parc Jura vaudois                                                  16369
## Schweizerischer Nationalpark                                        3457
## Parc Ela                                                           85746
## Naturpark Gantrisch                                                45875
## Parc naturel régional de la Vallée du Trient                        8968
## Naturpark Pfyn-Finges                                              12010
## Landschaftspark Binntal                                            11642
## Naturpark Thal                                                      7640
## Parc régional Chasseral                                            20236
## UNESCO Biosphäre Entlebuch                                         42712
## Biosfera Val Müstair                                                3375
## Parc du Doubs                                                      13977
## Parco Val Calanca                                                   1226
## Naturpark Beverin                                                  80323
## Regionaler Naturpark Schaffhausen                                   4144
## Naturpark Diemtigtal                                                5295
## Parc naturel périurbain du Jorat                                     292
## Wildnispark Zürich Sihlwald                                            0
melted_congruency  <- melt(parks_congruency, value.name = "Coocurrences", varnames=c('Park1', 'Park2')) #Edgelist of parks
head(melted_congruency,10)
##                                           Park1           Park2 Coocurrences
## 1                               Jurapark Aargau Jurapark Aargau            0
## 2   Parc naturel régional Gruyère Pays-d’Enhaut Jurapark Aargau       326989
## 3                             Parc Jura vaudois Jurapark Aargau       288978
## 4                  Schweizerischer Nationalpark Jurapark Aargau        18934
## 5                                      Parc Ela Jurapark Aargau       253551
## 6                           Naturpark Gantrisch Jurapark Aargau       193419
## 7  Parc naturel régional de la Vallée du Trient Jurapark Aargau        59763
## 8                         Naturpark Pfyn-Finges Jurapark Aargau        95073
## 9                       Landschaftspark Binntal Jurapark Aargau        28349
## 10                               Naturpark Thal Jurapark Aargau        76947
base_size <- 9 #size of the labels for the ggplot

#Heatmap of the coocurrences between Parks 
ggplot(melted_congruency, aes(x=Park1, y=Park2, fill= Coocurrences)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "white", high = "red") +
  coord_fixed()+
  theme_grey(base_size = base_size) + 
  labs(x = "", y = "") + 
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) + 
  theme(legend.position = "right",
       axis.text.x = element_text(size = base_size * 0.8, 
                                angle = 90, 
                                hjust = 0, colour ="grey50"))

###6.4 Fitting the model

#To interpret correspondence analysis, the first step is to evaluate whether there is a significant dependency between the rows and columns. 
#A rigorous method is to use the chi-square statistic for examining the association between row and column variables

#Fitting the data avoiding the null values, and with a correlation reduce the extreme values
fit <- princomp(na.omit(parks_congruency), cor = TRUE)
fit
## Call:
## princomp(x = na.omit(parks_congruency), cor = TRUE)
## 
## Standard deviations:
##           Comp.1           Comp.2           Comp.3           Comp.4 
## 3.74908461577732 1.43925173377828 1.09942224719448 0.79698115535258 
##           Comp.5           Comp.6           Comp.7           Comp.8 
## 0.76480103005762 0.67632060954672 0.60027421303011 0.42853294360283 
##           Comp.9          Comp.10          Comp.11          Comp.12 
## 0.36927184288334 0.35537833705394 0.28318233527726 0.18560299783339 
##          Comp.13          Comp.14          Comp.15          Comp.16 
## 0.17482937686407 0.14724592470794 0.08885308870610 0.05274692466407 
##          Comp.17          Comp.18          Comp.19          Comp.20 
## 0.04361403623870 0.02402013785265 0.00344321145989 0.00000002172407 
## 
##  20  variables and  20 observations.
#Graph to find the breaks
plot(fit,type="lines", main = "Correlation of terms between Parks")

###6.5 Validation of the model

#Measure the relation between the parks with Chi2
chisq <- chisq.test(parks_congruency)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  parks_congruency
## X-squared = 22791483, df = 361, p-value < 2.2e-16

###6.7 Application of the correspondence analysis method

#the method CA is always applied when there is correlation
res.ca <- CA(parks_congruency,  graph = FALSE)
summary(res.ca)
## 
## Call:
## CA(X = parks_congruency, graph = FALSE) 
## 
## The chi square of independence between the two variables is equal to 22791483 (p-value =  0 ).
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               0.103   0.072   0.036   0.027   0.025   0.014   0.009
## % of var.             33.180  23.133  11.533   8.720   8.093   4.383   2.766
## Cumulative % of var.  33.180  56.312  67.845  76.565  84.658  89.041  91.807
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.008   0.007   0.004   0.002   0.002   0.002   0.001
## % of var.              2.532   2.103   1.211   0.747   0.690   0.574   0.162
## Cumulative % of var.  94.338  96.442  97.653  98.400  99.090  99.664  99.826
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19
## Variance               0.000   0.000   0.000   0.000   0.000
## % of var.              0.104   0.029   0.024   0.016   0.000
## Cumulative % of var.  99.930  99.960  99.983 100.000 100.000
## 
## Rows (the 10 first)
##                                                Iner*1000    Dim.1    ctr   cos2
## Jurapark Aargau                              |     3.285 |  0.022  0.015  0.005
## Parc naturel régional Gruyère Pays-d’Enhaut  |    41.278 |  0.442 30.413  0.762
## Parc Jura vaudois                            |    66.629 | -0.634 46.557  0.722
## Schweizerischer Nationalpark                 |     1.917 | -0.105  0.126  0.068
## Parc Ela                                     |    29.152 | -0.143  1.837  0.065
## Naturpark Gantrisch                          |    16.781 |  0.005  0.003  0.000
## Parc naturel régional de la Vallée du Trient |     6.781 | -0.136  0.747  0.114
## Naturpark Pfyn-Finges                        |     7.813 | -0.070  0.226  0.030
## Landschaftspark Binntal                      |     3.381 | -0.106  0.226  0.069
## Naturpark Thal                               |     8.415 | -0.179  0.986  0.121
##                                                 Dim.2    ctr   cos2    Dim.3
## Jurapark Aargau                              | -0.025  0.026  0.006 | -0.007
## Parc naturel régional Gruyère Pays-d’Enhaut  | -0.045  0.446  0.008 |  0.073
## Parc Jura vaudois                            | -0.351 20.439  0.221 |  0.104
## Schweizerischer Nationalpark                 |  0.289  1.375  0.517 |  0.009
## Parc Ela                                     |  0.262  8.828  0.218 | -0.433
## Naturpark Gantrisch                          |  0.110  1.670  0.072 | -0.031
## Parc naturel régional de la Vallée du Trient |  0.169  1.673  0.178 | -0.015
## Naturpark Pfyn-Finges                        |  0.226  3.394  0.313 |  0.016
## Landschaftspark Binntal                      |  0.254  1.853  0.395 |  0.026
## Naturpark Thal                               | -0.371  6.090  0.522 | -0.026
##                                                 ctr   cos2  
## Jurapark Aargau                               0.004  0.000 |
## Parc naturel régional Gruyère Pays-d’Enhaut   2.360  0.021 |
## Parc Jura vaudois                             3.614  0.019 |
## Schweizerischer Nationalpark                  0.003  0.000 |
## Parc Ela                                     48.352  0.596 |
## Naturpark Gantrisch                           0.270  0.006 |
## Parc naturel régional de la Vallée du Trient  0.027  0.001 |
## Naturpark Pfyn-Finges                         0.033  0.002 |
## Landschaftspark Binntal                       0.040  0.004 |
## Naturpark Thal                                0.058  0.002 |
## 
## Columns (the 10 first)
##                                                Iner*1000    Dim.1    ctr   cos2
## Jurapark Aargau                              |     3.285 | -0.022  0.015  0.005
## Parc naturel régional Gruyère Pays-d’Enhaut  |    41.278 | -0.442 30.413  0.762
## Parc Jura vaudois                            |    66.629 |  0.634 46.557  0.722
## Schweizerischer Nationalpark                 |     1.917 |  0.105  0.126  0.068
## Parc Ela                                     |    29.152 |  0.143  1.837  0.065
## Naturpark Gantrisch                          |    16.781 | -0.005  0.003  0.000
## Parc naturel régional de la Vallée du Trient |     6.781 |  0.136  0.747  0.114
## Naturpark Pfyn-Finges                        |     7.813 |  0.070  0.226  0.030
## Landschaftspark Binntal                      |     3.381 |  0.106  0.226  0.069
## Naturpark Thal                               |     8.415 |  0.179  0.986  0.121
##                                                 Dim.2    ctr   cos2    Dim.3
## Jurapark Aargau                              | -0.025  0.026  0.006 |  0.007
## Parc naturel régional Gruyère Pays-d’Enhaut  | -0.045  0.446  0.008 | -0.073
## Parc Jura vaudois                            | -0.351 20.439  0.221 | -0.104
## Schweizerischer Nationalpark                 |  0.289  1.375  0.517 | -0.009
## Parc Ela                                     |  0.262  8.828  0.218 |  0.433
## Naturpark Gantrisch                          |  0.110  1.670  0.072 |  0.031
## Parc naturel régional de la Vallée du Trient |  0.169  1.673  0.178 |  0.015
## Naturpark Pfyn-Finges                        |  0.226  3.394  0.313 | -0.016
## Landschaftspark Binntal                      |  0.254  1.853  0.395 | -0.026
## Naturpark Thal                               | -0.371  6.090  0.522 |  0.026
##                                                 ctr   cos2  
## Jurapark Aargau                               0.004  0.000 |
## Parc naturel régional Gruyère Pays-d’Enhaut   2.360  0.021 |
## Parc Jura vaudois                             3.614  0.019 |
## Schweizerischer Nationalpark                  0.003  0.000 |
## Parc Ela                                     48.352  0.596 |
## Naturpark Gantrisch                           0.270  0.006 |
## Parc naturel régional de la Vallée du Trient  0.027  0.001 |
## Naturpark Pfyn-Finges                         0.033  0.002 |
## Landschaftspark Binntal                       0.040  0.004 |
## Naturpark Thal                                0.058  0.002 |

###6.8 Calculation of the eigen values

#Calculation of the dimensions (Park) mean value
eig.val <- get_eigenvalue (res.ca)
eig.val
##            eigenvalue variance.percent cumulative.variance.percent
## Dim.1  0.103375010817    33.1795804543                    33.17958
## Dim.2  0.072072480385    23.1326182466                    56.31220
## Dim.3  0.035931008923    11.5325337522                    67.84473
## Dim.4  0.027168559449     8.7201094050                    76.56484
## Dim.5  0.025213842341     8.0927170301                    84.65756
## Dim.6  0.013656477476     4.3832275281                    89.04079
## Dim.7  0.008617470371     2.7658913817                    91.80668
## Dim.8  0.007887989982     2.5317549781                    94.33843
## Dim.9  0.006552761289     2.1031956241                    96.44163
## Dim.10 0.003774072406     1.2113385823                    97.65297
## Dim.11 0.002326489777     0.7467177426                    98.39968
## Dim.12 0.002149925393     0.6900470625                    99.08973
## Dim.13 0.001789479121     0.5743570521                    99.66409
## Dim.14 0.000505604670     0.1622805231                    99.82637
## Dim.15 0.000323681527     0.1038898780                    99.93026
## Dim.16 0.000091821403     0.0294712967                    99.95973
## Dim.17 0.000073960919     0.0237387375                    99.98347
## Dim.18 0.000050085307     0.0160755434                    99.99954
## Dim.19 0.000001418173     0.0004551815                   100.00000

###6.9 Visualization of the correspondence analysis

#Graph of the alignment of the Parks, where the column profiles must be presented in row space or vice-versa.
#It is used the row eigen values to draw, and in the color (clusters) the grade of contribution
#The contribution is how much the Park is related with the represented factor (display of Dim1 and Dim2).
fviz_ca_row(res.ca, col.row= "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
            repel = TRUE,
            title = "Correspondence Analysis of Parks (Contribution of Row)",
            shape.row = 2, 
            labelsize = 3,
            pointsize = 2)

###6.10 Visualization of the correlation between dimensions

#Factorial contribution by dimensions per Park
corrplot(res.ca$row$cos2, is.corr=TRUE, 
         tl.col = "black", 
         cl.align.text = "l", 
         tl.cex=0.9, 
         main="Correspondence of Parks by dimensions",
         col=COL1('YlOrBr', 200))

# Visualize row contributions on axes 1 in Dim1
fviz_contrib(res.ca, choice ="row", axes = 1)

# Visualize row contributions on axes 1 in Dim2
fviz_contrib(res.ca, choice ="row", axes = 2)

#Export the summary of the analysis
write.infile(res.ca, "ca.csv", sep = ";")